includes:
in_header: preamble.tex
library(ggplot2)
library(plm)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(writexl)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
library(nlme)
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
library(stringr)
library(broom)
library(tidyr)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(PerformanceAnalytics)
## Loading required package: xts
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(Metrics)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(glue)
EFPIA/ Europe
EFPIA R&D spending by country by year in euros
From yearly R&D reports 2004: Link Removed. Screenshot can be
found in original data folder
2005: https://www.efpia.eu/media/15485/the-pharmaceutical-industry-in-figures-edition-2007.pdf
2006: https://www.efpia.eu/media/15486/the-pharmaceutical-industry-in-figures-edition-2008.pdf
2007: https://www.pharmine.eu/wp-content/uploads/2014/04/The_Pharma_Industry_in_figures_Edition_2009-20080612-009-EN-v1.pdf
2008: https://www.sfee.gr/wp-content/uploads/2015/05/Figures_2010_Final-20100611-001-EN-v1_0.pdf
2009: https://www.efpia.eu/media/15489/the-pharmaceutical-industry-in-figures-edition-2011.pdf
2010: https://www.sfee.gr/wp-content/uploads/2015/05/EFPIA_Figures_2012_Final.pdf
Data for 2011 could not be found
2012: https://www.phar-in.eu/wp-content/uploads/2014/05/Figures_2014_Final.pdf
2013: https://www.efpia.eu/media/25822/2015-the-pharmaceutical-industry-in-figures.pdf
2014: https://www.efpia.eu/media/25055/the-pharmaceutical-industry-in-figures-june-2016.pdf
2015: https://www.efpia.eu/media/219735/efpia-pharmafigures2017_statisticbroch_v04-final.pdf
2016: https://www.efpia.eu/media/361960/efpia-pharmafigures2018_v07-hq.pdf
2017: https://www.efpia.eu/media/412931/the-pharmaceutical-industry-in-figures-2019.pdf
2018: https://www.efpia.eu/media/554521/efpia_pharmafigures_2020_web.pdf
2019: https://www.efpia.eu/media/602709/the-pharmaceutical-industry-in-figures-2021.pdf
2020: https://www.efpia.eu/media/637143/the-pharmaceutical-industry-in-figures-2022.pdf
2021: https://www.efpia.eu/media/rm4kzdlx/the-pharmaceutical-industry-in-figures-2023.pdf
These can also be found in the the Original Data Folder
efpia_rd <- read.csv("Cleaned Data/Area Level R&D Spending/EFPIA R&D Spending by Country in EU.csv")
efpia_rd <- clean_names(efpia_rd)
efpia_rd
Formatting - removing commas, making dollars numeric
efpia_rd$r_d_spending_millions_euros <- gsub(",", "", efpia_rd$r_d_spending_millions_euros)
efpia_rd$r_d_spending_millions_euros <- as.numeric(efpia_rd$r_d_spending_millions_euros)
## Warning: NAs introduced by coercion
efpia_rd
Sum EU R&D spending by year over countries to get total
spending
efpia_rd_totals <- efpia_rd %>%
group_by(year) %>%
summarise(total_mil_euros = sum(r_d_spending_millions_euros, na.rm = TRUE))
efpia_rd_totals
Imputing 2011 with average of 2011 and 2012 efpia_rd_totals
rd_2012 <- efpia_rd_totals[efpia_rd_totals$year == 2012, ]$total_mil_euros
rd_2010 <- efpia_rd_totals[efpia_rd_totals$year == 2010, ]$total_mil_euros
avg_2010_2012 <- (rd_2010 + rd_2012) / 2
efpia_rd_totals <- rbind(efpia_rd_totals, c(2011, avg_2010_2012))
efpia_rd_totals <- efpia_rd_totals[order(efpia_rd_totals$year),]
efpia_rd_totals
Euros to Dollars https://www.macrotrends.net/2548/euro-dollar-exchange-rate-historical-chart
eurotousdavgconversion <- read_xlsx("Original Data/Euro to USD Conversion Avg by Year.xlsx")
eurotousdavgconversion
eurotodollars <- as.data.frame(cbind(eurotousdavgconversion$Year, eurotousdavgconversion$`Average Closing Price`))
colnames(eurotodollars) <- c("year", "euros_to_dollar_conversion")
eurotodollars
Multiply euros by conversion rate to get r&d spending in dollars,
transform to billions of dollars
rd_conversion_merged_df <- merge(efpia_rd_totals, eurotodollars, by = "year")
rd_conversion_merged_df
efpia_rd_totals$total_rd_mil_dollars <- rd_conversion_merged_df$euros_to_dollar_conversion * rd_conversion_merged_df$total_mil_euros
efpia_rd_totals$annual_domestic_RD_spending_bil_dollars <- efpia_rd_totals$total_rd_mil_dollars /1000
efpia_rd_totals
EU orphan drug market authorizations
EU Orphan Drugs were scraped from the pdf: https://www.orpha.net/pdfs/orphacom/cahiers/docs/GB/list_of_orphan_drugs_in_europe.pdf
It can also be found in the the Original Data Folder
Combines lists:
List of orphan medicinal products in Europe with European orphan
designation and European marketing authorization
Annex 1: Orphan medicinal products withdrawn from the European
Community Register of orphan medicinal products
Annex 2: Orphan medicinal products withdrawn from use in the
European Union
eu_orphan_drugs <- read_xlsx("Cleaned Data/Area Level Orphan Authorizations/EU_Orphan_w_Company from https___www.orpha.net_orphacom_cahiers_docs_GB_list_of_orphan_drugs_in_europe.xlsx")
eu_orphan_drugs <- clean_names(eu_orphan_drugs)
eu_orphan_drugs <- subset(eu_orphan_drugs, market_authorization_year < 2022)
eu_orphan_drugs
Check for duplicated rows
eu_orphan_drugs[duplicated(eu_orphan_drugs),]
Number of MAs per company in EU - note companies may go by multiple
names
eu_company_freq <- eu_orphan_drugs %>%
count(firm) %>%
arrange(desc(n))
colnames(eu_company_freq) <- c("Firm", "Market Authorization Count")
eu_company_freq
EU number of market authorizations for each drug
eu_orphan_drugs$tradename <- tolower(eu_orphan_drugs$tradename)
eu_ma_freq <- eu_orphan_drugs %>%
count(tradename) %>%
arrange(desc(n))
colnames(eu_ma_freq) <- c("Trade Name", "Market Authorizations")
eu_ma_freq
How many ODs with more than 1 authorization in EU
nrow(eu_ma_freq[eu_ma_freq$`Market Authorizations` > 1, ])
## [1] 9
eu_ma_freq$`Trade Name` <- str_to_title(eu_ma_freq$`Trade Name`)
eu_ma_freq[eu_ma_freq$`Market Authorizations` > 1, ]
Export to table
stargazer(eu_ma_freq[1:9,], title= "EU Market Authorization Counts for Orphan Drugs with Over 1 Market Authorization", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/EU_OD_Multiple_MAs.htm")
##
## <table style="text-align:center"><caption><strong>EU Market Authorization Counts for Orphan Drugs with Over 1 Market Authorization</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Trade Name</td><td>Market Authorizations</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Glivec</td><td>6</td></tr>
## <tr><td>Nexavar</td><td>3</td></tr>
## <tr><td>Revlimid</td><td>3</td></tr>
## <tr><td>Carbaglu</td><td>2</td></tr>
## <tr><td>Soliris</td><td>2</td></tr>
## <tr><td>Torisel</td><td>2</td></tr>
## <tr><td>Tracleer</td><td>2</td></tr>
## <tr><td>Yondelis</td><td>2</td></tr>
## <tr><td>Zavesca</td><td>2</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>
Look at some instances where there are multiple market authorizations
for the same drug
eu_orphan_drugs[eu_orphan_drugs$tradename == "glivec",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "nexavar",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "revlimid",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "carbaglu",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "soliris",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "torisel",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "tracleer",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "yondelis",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "zavesca",]
Select only new drugs in the EU
eu_orphan_drugs_new <- eu_orphan_drugs %>%
group_by(tradename) %>%
slice_min(order_by = market_authorization_year, n = 1, with_ties = FALSE) %>% # Select the entry with the earliest market authorization year
ungroup()
eu_orphan_drugs_new
Count new EU orphan drug market authorizations by year
eu_new_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs_new$market_authorization_year))
eu_new_od_ma_freqs
colnames(eu_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
eu_new_od_ma_freqs
eu_new_od_ma_freqs$Year <- as.character(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs$Year <- as.numeric(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs <- eu_new_od_ma_freqs[order(-eu_new_od_ma_freqs$Year),]
eu_new_od_ma_freqs
Select all MAs in the EU
Count EU market authorizations by year
eu_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs$market_authorization_year))
colnames(eu_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
eu_od_ma_freqs
eu_od_ma_freqs$Year <- as.character(eu_od_ma_freqs$Year)
eu_od_ma_freqs$Year <- as.numeric(eu_od_ma_freqs$Year)
eu_od_ma_freqs <- eu_od_ma_freqs[order(-eu_od_ma_freqs$Year),]
head(eu_od_ma_freqs)
eu_od_ma_freqs <- merge(eu_od_ma_freqs, eu_new_od_ma_freqs, by = "Year")
eu_od_ma_freqs
Graph of EU Market Authorized orphan drugs
eu_od_ma_freqs <- eu_od_ma_freqs <- eu_od_ma_freqs[eu_od_ma_freqs$Year > 2003,]
ggplot(eu_od_ma_freqs, aes(x = Year)) +
geom_line(aes(y = `Freq Orphan Drug MAs`, color="All Orphan Drug MAs")) +
geom_line(aes(y = `Freq New Orphan Drug MAs`, color="MAs for New Orphan Drugs")) +
labs(x = "Year",
y = "Number of Market Authorizations in the EU",
title="Annual Number of Orphan Drug Market Authorizations in the EU"
) +
scale_color_manual(name = " ", values = c("lightskyblue3", "darkred"))+
theme_minimal()

Merge orphan drug and R&D spending tables for EU
colnames(efpia_rd_totals) <- c("Year", "total_rd_euros", "total_rd_mil_dollars" , "total_rd_bil_dollars")
eu_rd_mas <- merge(efpia_rd_totals,eu_od_ma_freqs,by="Year")
eu_rd_mas <- clean_names(eu_rd_mas)
eu_rd_mas
PhRMA/ United States
US orphan drug Market Authorizations
https://www.accessdata.fda.gov/scripts/opdlisting/oopd/
Only approved products, accessed in 2023
us_orphan_drugs <- read.csv("Original Data/Area Level Orphan Authorizations/US_FDA_OD.csv")
us_orphan_drugs <- clean_names(us_orphan_drugs)
us_orphan_drugs <- us_orphan_drugs[!is.na(us_orphan_drugs$marketing_approval_date), ]
us_orphan_drugs
tail(us_orphan_drugs, n=5)
us_orphan_drugs <- head(us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),],-2) #notes removed at bottom of df
us_orphan_drugs$marketing_approval_date <- as.Date(us_orphan_drugs$marketing_approval_date, format="%m/%d/%y")
us_orphan_drugs <- us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),]
us_orphan_drugs$market_authorization_year <- year(us_orphan_drugs$marketing_approval_date)
us_orphan_drugs <- subset(us_orphan_drugs, market_authorization_year < 2022)
us_orphan_drugs
Check for duplicated rows. Remove one row because duplicated
us_orphan_drugs[duplicated(us_orphan_drugs),] # see if rows are duplicated
us_orphan_drugs[us_orphan_drugs$generic_name == "tacrolimus" ,] # view the duplication
us_orphan_drugs[710, ] # make sure we are removing correct row
us_orphan_drugs <- us_orphan_drugs[-710, ] # remove duplicated row
us_orphan_drugs[duplicated(us_orphan_drugs),] # ensure we removed duplicated row
US Number of MAs per company - note companies go by multiple names
ex. Novartis Pharmaceuticals Corporation/ Novartis Pharmaceuticals
Corp.
us_company_freq <- us_orphan_drugs %>%
count(sponsor_company) %>%
arrange(desc(n))
colnames(us_company_freq) <- c("Sponsor Company", "Market Authorizations")
us_company_freq
us_orphan_drugs
Where Trade Name is Null, replace with generic name
us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]
us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]$trade_name = us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]$generic_name
us_orphan_drugs$trade_name = tolower(us_orphan_drugs$trade_name)
US Number of market authorizations per drug
us_ma_freq <- us_orphan_drugs %>%
count(trade_name) %>%
arrange(desc(n))
colnames(us_ma_freq) <- c("Trade Name", "Market Authorizations")
us_ma_freq$`Trade Name` <- str_to_title(us_ma_freq$`Trade Name`)
us_ma_freq
How many ODs with more than 1 authorization in US
nrow(us_ma_freq[us_ma_freq$`Market Authorizations` > 1, ])
## [1] 181
How many ODs with more than 5 authorizations in US
nrow(us_ma_freq[us_ma_freq$`Market Authorizations` > 5, ])
## [1] 12
us_ma_freq[us_ma_freq$`Market Authorizations` > 5, ]
Export to table
stargazer(us_ma_freq[1:12,], title= "US Market Authorization Counts for Orphan Drugs with Over 5 Market Authorizations", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/US_OD_Multiple_MAs.htm")
##
## <table style="text-align:center"><caption><strong>US Market Authorization Counts for Orphan Drugs with Over 5 Market Authorizations</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Trade Name</td><td>Market Authorizations</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Keytruda</td><td>13</td></tr>
## <tr><td>Avastin</td><td>11</td></tr>
## <tr><td>Lynparza</td><td>11</td></tr>
## <tr><td>Imbruvica</td><td>10</td></tr>
## <tr><td>Kalydeco</td><td>10</td></tr>
## <tr><td>Opdivo</td><td>10</td></tr>
## <tr><td>Gleevec</td><td>9</td></tr>
## <tr><td>Revlimid</td><td>9</td></tr>
## <tr><td>Adcetris</td><td>7</td></tr>
## <tr><td>Darzalex</td><td>7</td></tr>
## <tr><td>Humira</td><td>7</td></tr>
## <tr><td>Novoseven</td><td>6</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>
Look at some instances where there are multiple market authorizations
for the same drug
us_orphan_drugs[us_orphan_drugs$trade_name == "keytruda",]
us_orphan_drugs[us_orphan_drugs$trade_name == "avastin",]
us_orphan_drugs[us_orphan_drugs$trade_name == "lynparza",]
Select only new drugs in the US
us_orphan_drugs_new <- us_orphan_drugs %>%
group_by(trade_name) %>%
slice_min(order_by = marketing_approval_date, n = 1, with_ties = FALSE) %>% # Select the entry with the earliest market authorization year
ungroup()
us_orphan_drugs_new
us_orphan_drugs_new$Marketing.year <- year(as.Date(us_orphan_drugs_new$marketing_approval_date))
us_new_od_ma_freqs <- as.data.frame(table(us_orphan_drugs_new$Marketing.year))
colnames(us_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
us_new_od_ma_freqs
Select all MAs in the US. Format US MA date as year
us_orphan_drugs$Marketing.year <- year(as.Date(us_orphan_drugs$marketing_approval_date))
us_od_ma_freqs <- as.data.frame(table(us_orphan_drugs$Marketing.year))
colnames(us_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
us_od_ma_freqs$Year <- as.character(us_od_ma_freqs$Year)
us_od_ma_freqs$Year <- as.numeric(us_od_ma_freqs$Year)
us_od_ma_freqs <- us_od_ma_freqs[order(-us_od_ma_freqs$Year),]
us_od_ma_freqs <- merge(us_od_ma_freqs, us_new_od_ma_freqs, by = "Year")
us_od_ma_freqs
Graph of US Market Authorizations orphan drugs
us_od_ma_freqs <- us_od_ma_freqs[us_od_ma_freqs$Year > 2003,]
us_od_ma_freqs <- us_od_ma_freqs[us_od_ma_freqs$Year < 2022,]
ggplot(us_od_ma_freqs, aes(x = Year)) +
geom_line(aes(y = `Freq Orphan Drug MAs`, color="All Orphan Drug MAs")) +
geom_line(aes(y = `Freq New Orphan Drug MAs`, color="MAs for New Orphan Drugs")) +
labs(x = "Year",
y = str_wrap("Number of Market Authorizations in the US", width=60),
title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the US", width=60),
) +
scale_color_manual(name = " ", values = c("lightskyblue3", "darkred"))+
theme_minimal()

Merge US orphan drug df with PhRMA r&d df
#phrma_us_rd <- phrma_us_rd[-dim(phrma_us_rd)[1],] # last row is NA
us_rd_mas <- merge(phrma_us_rd,us_od_ma_freqs,by="Year")
us_rd_mas
Format
us_rd_mas$Domestic.R.D. <- parse_number(us_rd_mas$Domestic.R.D.) #dollars are in millions
us_rd_mas <- clean_names(us_rd_mas)
us_rd_mas
US dataframe simplified
us_rd_mas$annual_domestic_RD_spending_bil_dollars <- us_rd_mas$domestic_r_d /1000 #convert from millions of dollars to billions
us_rd_mas_simplified <- as.data.frame(cbind(us_rd_mas$year, us_rd_mas$annual_domestic_RD_spending_bil_dollars, us_rd_mas$freq_orphan_drug_m_as, us_rd_mas$freq_new_orphan_drug_m_as))
colnames(us_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
us_rd_mas_simplified$area <- rep("US", nrow(us_rd_mas_simplified)) # write US indicator in area column
us_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(us_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars) # for lagged R&D spending - not used
us_rd_mas_simplified
EU dataframe simplified
eu_rd_mas_simplified <- as.data.frame(cbind(eu_rd_mas$year, eu_rd_mas$total_rd_bil_dollars, eu_rd_mas$freq_orphan_drug_m_as, eu_rd_mas$freq_new_orphan_drug_m_as))
colnames(eu_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")
eu_rd_mas_simplified$area <- rep("EU", nrow(eu_rd_mas_simplified)) # write EU indicator in area column
eu_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(eu_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars) # for lagged R&D spending - not used
eu_rd_mas_simplified
EU GDP Per Capita In Billions of Dollars - data is in current US
dollars. Data was pulled in 2023 however, only growth rates are used in
our analysis
https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-per-capita
eu_gdp_percapita_annual <- read.csv("Original Data/GDP/european-union-gdp-per-capita-annual.csv", skip=16)
eu_gdp_percapita_annual$year <- year(as.Date(eu_gdp_percapita_annual$date))
eu_gdp_percapita_annual <- clean_names(eu_gdp_percapita_annual)
colnames(eu_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(eu_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
eu_gdp_percapita_annual
EU GDP (Billion USD) - not used in analysis
https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-gross-domestic-product
EU GDP per capita is measured in billions of dollars and the growth rate
is taken from those measurements
eu_gdp_annual <- read.csv("Original Data/GDP/european-union-gdp-gross-domestic-product.csv", skip=8)
eu_gdp_annual <- eu_gdp_annual[c(24:64),c(1:4)]
eu_gdp_annual$year <- year(as.Date(eu_gdp_annual$Date, format="%m/%d/%y"))
eu_gdp_annual
eu_gdp_annual <- clean_names(eu_gdp_annual)
colnames(eu_gdp_annual)[2] <- "gdp_billions_of_usd"
eu_gdp_annual
Merge GDP Per Capita, GDP Growth rate into same df
eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_annual[,c("year", "gdp_billions_of_usd")], by = "year", all.x = TRUE)
eu_rd_mas_simplified
US GDP Per Capita In Dollars - data is in current US dollars. Data
was pulled in 2023 however, only growth rates are used in our
analysis
https://www.macrotrends.net/global-metrics/countries/USA/united-states/gdp-per-capita
us_gdp_percapita_annual <- read.csv("Original Data/GDP/united-states-gdp-per-capita-annual.csv", skip=16)
us_gdp_percapita_annual$year <- year(as.Date(us_gdp_percapita_annual$date))
us_gdp_percapita_annual <- clean_names(us_gdp_percapita_annual)
colnames(us_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(us_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
us_gdp_percapita_annual
US GDP (Billion USD) - not used in analysis
https://www.macrotrends.net/global-metrics/countries/usa/united-states/gdp-gross-domestic-product#:~:text=U.S.%20gdp%20for%202023%20was,a%200.92%25%20decline%20from%202019
us_gdp_annual <- read.csv("Original Data/GDP/united-states-gdp-gross-domestic-product.csv", skip=8)
us_gdp_annual <- us_gdp_annual[c(24:64),c(1:4)]
us_gdp_annual$year <- year(as.Date(us_gdp_annual$Date, format="%m/%d/%y"))
us_gdp_annual <- clean_names(us_gdp_annual)
colnames(us_gdp_annual)[2] <- "gdp_billions_of_usd"
us_gdp_annual
Merge GDP Per Capita, GDP Growth rate into same df
us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_annual[,c("year", "gdp_billions_of_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified
Limit dataframe to 2004 and later, make lagged R&D spending NA at
year 2004 (although lagged spending not used in analysis)
combined_rd_ma_df <- rbind(eu_rd_mas_simplified, us_rd_mas_simplified)
combined_rd_ma_df <- combined_rd_ma_df %>%
filter(year > 2003)
combined_rd_ma_df[combined_rd_ma_df$area =="US" & combined_rd_ma_df$year==2004,]$lagged_annual_domestic_RD_spending_bil_dollars <- NA
combined_rd_ma_df
Data Visualizations
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
geom_point() +
labs(x = "Annual Domestic RD Spending (Billions USD)",
y = "Number of Orphan Drug Market Authorizations",
color = "Region",
title=str_wrap("Orphan Drug Market Authorizations by Annual Domestic R&D Spending in the EU and US", 60)) +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year , y = freq_orphan_drug_mas, color = area)) +
geom_line() +
labs(x = "Year",
y = "Number of Orphan Drug Market Authorizations", width=30,
color = "Region",
title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU and US", width=70)) +
theme_minimal() +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year , y = freq_new_orphan_drug_mas, color = area)) +
geom_line() +
labs(x = "Year",
y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width=40),
color = "Region",
title=str_wrap("Annual Number of Market Authorizations for New Orphan Drugs in the EU and US", width=60)) +
theme_minimal() +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area)) +
geom_line() +
labs(x = "Year",
y = "Domestic R&D Spending (Billions USD)",
color = "Region",
title="Annual Domestic R&D Spending in the EU and US") +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_billions_of_usd, color = area)) +
geom_line() +
labs(x = "Year",
y = "GDP (Billions USD)",
color = "Region",
title="GDP in the EU and US") +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_per_capita_annual_growthrate_usd, color = area)) +
geom_line() +
labs(x = "Year",
y = "GDP Per Capita on Last Day of Year (USD)",
color = "Region",
title="GDP Per Capita on Last Day of Year in the EU and US") +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

combined_rd_ma_df
ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars/gdp_billions_of_usd, color = area)) +
geom_line() +
labs(x = "Year",
y = "Domestic R&D Spending / GDP",
color = "Region",
title=str_wrap("Annual Domestic Pharmaceutical R&D Spending as a Fraction of GDP in the EU and US", width=50)) +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
geom_point() +
labs(x = "Domestic R&D Spending (Billions USD)",
y = "Number of Orphan Drug Market Authorizations",
color = "Region",
title=str_wrap("Annual Number of Orphan Drug Market Authorizations by Domestic R&D Spending in the EU and US", width=50)) +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area)) +
geom_point() +
labs(x = "Domestic R&D Spending",
y = "Number of MAs for New Orphan Drugs",
color = "Region",
title="MAs for new Orphan Drugs by Domestic R&D Spending in the EU and US") +
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()

Orphan Drug Frequencies by Year by Area
hist(subset(combined_rd_ma_df, area=="US")$freq_orphan_drug_mas, main="US Frequencies of Annual Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_orphan_drug_mas, main="EU Frequencies of Annual Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

New Orphan Drugs Frequencies by Year by Area
hist(subset(combined_rd_ma_df, area=="US")$freq_new_orphan_drug_mas, main="US Frequencies of Annual New Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_new_orphan_drug_mas, main="EU Frequencies of Annual New Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

Making a years after 2004 variable
combined_rd_ma_df$yearsafter2004 <- combined_rd_ma_df$year - 2004
combined_rd_ma_df
VIFs & Correlations of preditors
Models to find VIFs
mod.rd.vif.check.rd <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
vif(mod.rd.vif.check.rd)
## as.factor(area) yearsafter2004
## 1.000047 1.023525
## gdp_per_capita_annual_growthrate_usd
## 1.023572
mod.rd.vif.check.od <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd + annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
vif(mod.rd.vif.check.od)
## as.factor(area) yearsafter2004
## 1.448334 3.550554
## gdp_per_capita_annual_growthrate_usd annual_domestic_RD_spending_bil_dollars
## 1.089107 3.919797
Correlation Matrix
cor(combined_rd_ma_df[, c("annual_domestic_RD_spending_bil_dollars",
"yearsafter2004",
"gdp_per_capita_annual_growthrate_usd"
)])
## annual_domestic_RD_spending_bil_dollars
## annual_domestic_RD_spending_bil_dollars 1.000000000
## yearsafter2004 0.783319632
## gdp_per_capita_annual_growthrate_usd 0.009881313
## yearsafter2004
## annual_domestic_RD_spending_bil_dollars 0.7833196
## yearsafter2004 1.0000000
## gdp_per_capita_annual_growthrate_usd -0.1516009
## gdp_per_capita_annual_growthrate_usd
## annual_domestic_RD_spending_bil_dollars 0.009881313
## yearsafter2004 -0.151600941
## gdp_per_capita_annual_growthrate_usd 1.000000000
Summmaries
R&D spending growth rates by year
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$annual_domestic_RD_spending_bil_dollars
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$lagged_annual_domestic_RD_spending_bil_dollars
EU_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
EU_annual_domestic_RD_spending_bil_dollars_growth_rate
## [1] NA 1.0296598 1.1576633 1.1422398 1.0950631 0.9774597 0.9692483
## [8] 1.0872053 0.9639885 1.0449788 1.0146180 0.9067316 1.0116816 1.0590698
## [15] 1.0736374 0.9868447 1.0691355 1.1101822
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$annual_domestic_RD_spending_bil_dollars
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$lagged_annual_domestic_RD_spending_bil_dollars
US_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
US_annual_domestic_RD_spending_bil_dollars_growth_rate
## [1] NA 1.0478253 1.0968355 1.0777352 0.9716650 0.9939530 1.1508117
## [8] 0.8939616 1.0312479 1.0769337 1.0084489 1.1809938 1.0895376 1.0636573
## [15] 1.1159483 1.0343509 1.1251628 1.0994019
rdspending_growth_rates <- data.frame("Year"=seq(2004,2021), "US"=round(US_annual_domestic_RD_spending_bil_dollars_growth_rate,2), "EU"=round(EU_annual_domestic_RD_spending_bil_dollars_growth_rate,2))
rdspending_growth_rates$US <- as.character(rdspending_growth_rates$US )
rdspending_growth_rates$EU <- as.character(rdspending_growth_rates$EU )
# make 2011 NA because 2011 values were imputed
rdspending_growth_rates[rdspending_growth_rates$Year == 2011,]$EU <- "NA"
rdspending_growth_rates[rdspending_growth_rates$Year == 2012,]$EU <- "NA"
rdspending_growth_rates
stargazer(rdspending_growth_rates, out="Final Figures Formatted/RDSpendingAnnualGrowthRates.htm" , summary=FALSE, rownames=FALSE, colnames = TRUE, digits=2, digit.separator="",column.sep.width= "15pt", title="Annual Growth Rate of Phramaceutical R&D Spending")
##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Wed, Jan 08, 2025 - 03:12:20
## \begin{table}[!htbp] \centering
## \caption{Annual Growth Rate of Phramaceutical R&D Spending}
## \label{}
## \begin{tabular}{@{\extracolsep{15pt}} ccc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## Year & US & EU \\
## \hline \\[-1.8ex]
## $2004$ & & \\
## $2005$ & 1.05 & 1.03 \\
## $2006$ & 1.1 & 1.16 \\
## $2007$ & 1.08 & 1.14 \\
## $2008$ & 0.97 & 1.1 \\
## $2009$ & 0.99 & 0.98 \\
## $2010$ & 1.15 & 0.97 \\
## $2011$ & 0.89 & NA \\
## $2012$ & 1.03 & NA \\
## $2013$ & 1.08 & 1.04 \\
## $2014$ & 1.01 & 1.01 \\
## $2015$ & 1.18 & 0.91 \\
## $2016$ & 1.09 & 1.01 \\
## $2017$ & 1.06 & 1.06 \\
## $2018$ & 1.12 & 1.07 \\
## $2019$ & 1.03 & 0.99 \\
## $2020$ & 1.13 & 1.07 \\
## $2021$ & 1.1 & 1.11 \\
## \hline \\[-1.8ex]
## \end{tabular}
## \end{table}
Linear Models of Annual Domestic R&D Spending
Function to Plot Linear Models of Annual Domestic R&D
Spending
plot_linear_rd <- function(linear_model, model_number){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
combined_rd_ma_df$residuals <- resid(linear_model)
par(mfrow=c(2,3))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
#qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
#qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('Linear Model {model_number} of Domestic R&D Spending - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
predicted <- predict(linear_model , type = "response")
ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = "Domestic R&D Spending (Billions USD)",
alpha = "",
color = "Region",
title=glue('Linear Model {model_number} of Annual Domestic R&D Spending'))+
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1), guide = guide_legend(order = 1))+
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
#Ariel Muldoon
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
# Teun van den Brand
# 2024/02/26
}
Function to Get Model Metrics of Linear Models of Annual Domestic
R&D Spending
get_rd_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
#Get F stat & pvalue
model_fstat <- summary(model)$fstatistic[1]
model_df1 <- summary(model)$fstatistic[2]
model_df2 <- summary(model)$fstatistic[3]
model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
# Get Adj Rsq and AIC
model_adjRsq <- summary(model)$adj.r.squared
model_aic <- AIC(model)
#List metrics in a df
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Annual Domestic R&D Spending
mod.rd.1 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) , data=combined_rd_ma_df)
summary(mod.rd.1)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area),
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.701 -8.972 -0.418 4.094 33.354
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.331 2.667 14.374 5.21e-16 ***
## as.factor(area)US 7.926 3.771 2.102 0.0431 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.31 on 34 degrees of freedom
## Multiple R-squared: 0.115, Adjusted R-squared: 0.08893
## F-statistic: 4.416 on 1 and 34 DF, p-value: 0.04308
plot_linear_rd(mod.rd.1, "1")


#Estimate covariance matrix with Newey West
mod.rd.1.vcov <- vcovPL(mod.rd.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.1 <- coeftest(mod.rd.1, vcov = mod.rd.1.vcov)
mod.rd.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.3309 1.9857 19.3037 < 2e-16 ***
## as.factor(area)US 7.9255 4.0287 1.9673 0.05736 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.1)
## 2.5 % 97.5 %
## (Intercept) 34.2955306 42.36628
## as.factor(area)US -0.2617976 16.11283
#Get Metrics
mod.rd.1.metrics <- get_rd_model_metrics(mod.rd.1, "Linear")
mod.rd.1.metrics
Linear Model 2 of Annual Domestic R&D Spending
mod.rd.2 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.2)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1660 -5.2646 -0.4164 3.3709 18.3543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.3317 2.2942 10.170 1.06e-11 ***
## as.factor(area)US 7.9255 2.1200 3.738 0.000702 ***
## yearsafter2004 1.7646 0.2043 8.637 5.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.36 on 33 degrees of freedom
## Multiple R-squared: 0.7286, Adjusted R-squared: 0.7121
## F-statistic: 44.28 on 2 and 33 DF, p-value: 4.528e-10
plot_linear_rd(mod.rd.2, "2")


#Estimate covariance matrix with Newey West
mod.rd.2.vcov <- vcovPL(mod.rd.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.2 <- coeftest(mod.rd.2, vcov = mod.rd.2.vcov)
mod.rd.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.33166 3.17686 7.3442 1.969e-08 ***
## as.factor(area)US 7.92552 4.08929 1.9381 0.0612 .
## yearsafter2004 1.76462 0.20683 8.5316 7.369e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.2)
## 2.5 % 97.5 %
## (Intercept) 16.868279 29.795037
## as.factor(area)US -0.394208 16.245244
## yearsafter2004 1.343810 2.185424
#Get Metrics
mod.rd.2.metrics <- get_rd_model_metrics(mod.rd.2, "Linear")
mod.rd.2.metrics
Linear Model 3 of Annual Domestic R&D Spending
mod.rd.3 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.3)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2708 -5.4330 -0.3892 3.8556 16.1289
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.1364 2.4080 9.193 1.70e-10 ***
## as.factor(area)US 7.9049 2.0872 3.787 0.000634 ***
## yearsafter2004 1.8088 0.2035 8.889 3.73e-10 ***
## gdp_per_capita_annual_growthrate_usd 0.2555 0.1785 1.431 0.162018
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.261 on 32 degrees of freedom
## Multiple R-squared: 0.7449, Adjusted R-squared: 0.721
## F-statistic: 31.14 on 3 and 32 DF, p-value: 1.296e-09
plot_linear_rd(mod.rd.3, "3")


#Estimate covariance matrix with Newey West
mod.rd.3.vcov <- vcovPL(mod.rd.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.3 <- coeftest(mod.rd.3, vcov = mod.rd.3.vcov)
mod.rd.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.13638 3.26543 6.7790 1.162e-07 ***
## as.factor(area)US 7.90493 4.12804 1.9149 0.06448 .
## yearsafter2004 1.80878 0.19030 9.5051 7.733e-11 ***
## gdp_per_capita_annual_growthrate_usd 0.25552 0.11530 2.2160 0.03392 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.3, level=0.9)
## 5 % 95 %
## (Intercept) 16.60510859 27.6676593
## as.factor(area)US 0.91249853 14.8973711
## yearsafter2004 1.48643572 2.1311166
## gdp_per_capita_annual_growthrate_usd 0.06020474 0.4508275
#Get Metrics
mod.rd.3.metrics <- get_rd_model_metrics(mod.rd.3, "Linear")
mod.rd.3.metrics
Linear Model 4 of Annual Domestic R&D Spending
mod.rd.4 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.rd.4)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004 + as.factor(area):yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3946 -3.0224 0.8889 3.1603 11.3927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.2932 2.0930 14.474 1.35e-15 ***
## as.factor(area)US -5.9977 2.9599 -2.026 0.0511 .
## yearsafter2004 0.9456 0.2102 4.499 8.46e-05 ***
## as.factor(area)US:yearsafter2004 1.6380 0.2972 5.511 4.49e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.626 on 32 degrees of freedom
## Multiple R-squared: 0.8607, Adjusted R-squared: 0.8477
## F-statistic: 65.92 on 3 and 32 DF, p-value: 8.626e-14
plot_linear_rd(mod.rd.4, "4")


#Estimate covariance matrix with Newey West
mod.rd.4.vcov <- vcovPL(mod.rd.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.4 <- coeftest(mod.rd.4, vcov = mod.rd.4.vcov)
mod.rd.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.29324 2.15653 14.0472 3.103e-15 ***
## as.factor(area)US -5.99766 4.71995 -1.2707 0.2129950
## yearsafter2004 0.94561 0.19025 4.9702 2.167e-05 ***
## as.factor(area)US:yearsafter2004 1.63802 0.44299 3.6977 0.0008122 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.4)
## 2.5 % 97.5 %
## (Intercept) 25.9005414 34.685948
## as.factor(area)US -15.6118810 3.616569
## yearsafter2004 0.5580724 1.333142
## as.factor(area)US:yearsafter2004 0.7356826 2.540358
#Get Metrics
mod.rd.4.metrics <- get_rd_model_metrics(mod.rd.4, "Linear")
mod.rd.4.metrics
Linear Model 5 of Annual Domestic R&D Spending
mod.rd.5 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.5)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4132 -2.8428 0.9883 2.4389 10.3907
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.4170 2.2497 13.076 3.72e-14 ***
## as.factor(area)US -5.5691 2.9831 -1.867 0.0714 .
## yearsafter2004 0.9957 0.2152 4.627 6.23e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.1403 0.1335 1.051 0.3013
## as.factor(area)US:yearsafter2004 1.5863 0.3008 5.273 9.81e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.619 on 31 degrees of freedom
## Multiple R-squared: 0.8655, Adjusted R-squared: 0.8482
## F-statistic: 49.88 on 4 and 31 DF, p-value: 4.498e-13
plot_linear_rd(mod.rd.5, "5")


#Estimate covariance matrix with Newey West
mod.rd.5.vcov <- vcovPL(mod.rd.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.5 <- coeftest(mod.rd.5, vcov = mod.rd.5.vcov)
mod.rd.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.416980 2.611732 11.2634 1.737e-12 ***
## as.factor(area)US -5.569062 4.963283 -1.1221 0.270456
## yearsafter2004 0.995731 0.215517 4.6202 6.360e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.140302 0.095988 1.4617 0.153897
## as.factor(area)US:yearsafter2004 1.586268 0.463832 3.4199 0.001775 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.5)
## 2.5 % 97.5 %
## (Intercept) 24.09031679 34.7436438
## as.factor(area)US -15.69174471 4.5536213
## yearsafter2004 0.55618135 1.4352798
## gdp_per_capita_annual_growthrate_usd -0.05546634 0.3360705
## as.factor(area)US:yearsafter2004 0.64027629 2.5322596
#Get Metrics
mod.rd.5.metrics <- get_rd_model_metrics(mod.rd.5, "Linear")
mod.rd.5.metrics
Linear Model 6 of Annual Domestic R&D Spending
mod.rd.6 <- lm(annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.6)
##
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) +
## yearsafter2004 + yearsafter2004:as.factor(area) + I(yearsafter2004^2) +
## I(yearsafter2004^2):as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7026 -1.8736 0.1949 1.5479 5.0044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.37392 2.15944 12.676 2.36e-13 ***
## as.factor(area)US 5.52983 2.63506 2.099 0.04468 *
## yearsafter2004 1.81053 0.55772 3.246 0.00295 **
## I(yearsafter2004^2) -0.04844 0.03102 -1.562 0.12922
## gdp_per_capita_annual_growthrate_usd 0.11579 0.09217 1.256 0.21906
## as.factor(area)US:yearsafter2004 -2.59485 0.71851 -3.611 0.00114 **
## as.factor(area)US:I(yearsafter2004^2) 0.24648 0.04040 6.102 1.21e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.831 on 29 degrees of freedom
## Multiple R-squared: 0.9527, Adjusted R-squared: 0.943
## F-statistic: 97.43 on 6 and 29 DF, p-value: < 2.2e-16
plot_linear_rd(mod.rd.6, "6")


#Estimate covariance matrix with Newey West
mod.rd.6.vcov <- vcovPL(mod.rd.6, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.6 <- coeftest(mod.rd.6, vcov = mod.rd.6.vcov)
mod.rd.nw.6
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.373916 2.705190 10.1190 5.042e-11
## as.factor(area)US 5.529825 1.494892 3.6991 0.0008995
## yearsafter2004 1.810535 0.668122 2.7099 0.0111824
## I(yearsafter2004^2) -0.048445 0.036708 -1.3197 0.1972472
## gdp_per_capita_annual_growthrate_usd 0.115789 0.071899 1.6104 0.1181367
## as.factor(area)US:yearsafter2004 -2.594854 0.549954 -4.7183 5.547e-05
## as.factor(area)US:I(yearsafter2004^2) 0.246480 0.029974 8.2233 4.569e-09
##
## (Intercept) ***
## as.factor(area)US ***
## yearsafter2004 *
## I(yearsafter2004^2)
## gdp_per_capita_annual_growthrate_usd
## as.factor(area)US:yearsafter2004 ***
## as.factor(area)US:I(yearsafter2004^2) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.6)
## 2.5 % 97.5 %
## (Intercept) 21.84118268 32.90665020
## as.factor(area)US 2.47242790 8.58722280
## yearsafter2004 0.44407169 3.17699759
## I(yearsafter2004^2) -0.12352097 0.02663137
## gdp_per_capita_annual_growthrate_usd -0.03126173 0.26283889
## as.factor(area)US:yearsafter2004 -3.71963522 -1.47007262
## as.factor(area)US:I(yearsafter2004^2) 0.18517747 0.30778301
#Get Metrics
mod.rd.6.metrics <- get_rd_model_metrics(mod.rd.6, "Linear")
mod.rd.6.metrics
mod.rd.metrics <- rbind(mod.rd.1.metrics, mod.rd.2.metrics, mod.rd.3.metrics, mod.rd.4.metrics, mod.rd.5.metrics, mod.rd.6.metrics)
aic_values <- round(mod.rd.metrics$aic)
rmse_values <- round(mod.rd.metrics$rmse, 2)
fstat_values <- round(mod.rd.metrics$fstat, 2)
p_values <- round(mod.rd.metrics$pval, 4)
adjrsq_values <- round(mod.rd.metrics$adjrsq, 2)
stargazer(mod.rd.1, mod.rd.2, mod.rd.3, mod.rd.4, mod.rd.5, mod.rd.6,
title="Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/rdmodelsNonAdjusted.htm",
covariate.labels = c("US", "Years After 2004", "Years After 2004^{2}","Annual GDP Growth Rate Per Capita", "US:Years After 2004", "US:Years After 2004^{2}", "Intercept"),
dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>7.926<sup>**</sup></td><td>7.926<sup>***</sup></td><td>7.905<sup>***</sup></td><td>-5.998<sup>*</sup></td><td>-5.569<sup>*</sup></td><td>5.530<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.771)</td><td>(2.120)</td><td>(2.087)</td><td>(2.960)</td><td>(2.983)</td><td>(2.635)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.765<sup>***</sup></td><td>1.809<sup>***</sup></td><td>0.946<sup>***</sup></td><td>0.996<sup>***</sup></td><td>1.811<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.204)</td><td>(0.203)</td><td>(0.210)</td><td>(0.215)</td><td>(0.558)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>-0.048</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.031)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.256</td><td></td><td>0.140</td><td>0.116</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.179)</td><td></td><td>(0.133)</td><td>(0.092)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004</td><td></td><td></td><td></td><td>1.638<sup>***</sup></td><td>1.586<sup>***</sup></td><td>-2.595<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.297)</td><td>(0.301)</td><td>(0.719)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>0.246<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.040)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>38.331<sup>***</sup></td><td>23.332<sup>***</sup></td><td>22.136<sup>***</sup></td><td>30.293<sup>***</sup></td><td>29.417<sup>***</sup></td><td>27.374<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.667)</td><td>(2.294)</td><td>(2.408)</td><td>(2.093)</td><td>(2.250)</td><td>(2.159)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>281</td><td>240</td><td>240</td><td>218</td><td>219</td><td>185</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11</td><td>6.09</td><td>5.9</td><td>4.36</td><td>4.29</td><td>2.54</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.09</td><td>0.71</td><td>0.72</td><td>0.85</td><td>0.85</td><td>0.94</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>4.42</td><td>44.28</td><td>31.14</td><td>65.92</td><td>49.88</td><td>97.43</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0.0431</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.rd.nw.1
m2 <- mod.rd.nw.2
m3 <- mod.rd.nw.3
m4 <- mod.rd.nw.4
m5 <- mod.rd.nw.5
m6 <- mod.rd.nw.6
stargazer(m1, m2, m3, m4, m5, m6, title="Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending", align=TRUE, type = "html", out="Final Models Formatted/rdmodels.htm",
covariate.labels = c("US", "Years After 2004", "Years After 2004^{2}","Annual GDP Growth Rate Per Capita", "US:Years After 2004", "US:Years After 2004^{2}", "Intercept"),
dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
omit.stat = c("all"),
model.numbers=FALSE,
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>7.926<sup>*</sup></td><td>7.926<sup>*</sup></td><td>7.905<sup>*</sup></td><td>-5.998</td><td>-5.569</td><td>5.530<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(4.029)</td><td>(4.089)</td><td>(4.128)</td><td>(4.720)</td><td>(4.963)</td><td>(1.495)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.765<sup>***</sup></td><td>1.809<sup>***</sup></td><td>0.946<sup>***</sup></td><td>0.996<sup>***</sup></td><td>1.811<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.207)</td><td>(0.190)</td><td>(0.190)</td><td>(0.216)</td><td>(0.668)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>-0.048</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.037)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.256<sup>**</sup></td><td></td><td>0.140</td><td>0.116</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.115)</td><td></td><td>(0.096)</td><td>(0.072)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004</td><td></td><td></td><td></td><td>1.638<sup>***</sup></td><td>1.586<sup>***</sup></td><td>-2.595<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.443)</td><td>(0.464)</td><td>(0.550)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>0.246<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.030)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>38.331<sup>***</sup></td><td>23.332<sup>***</sup></td><td>22.136<sup>***</sup></td><td>30.293<sup>***</sup></td><td>29.417<sup>***</sup></td><td>27.374<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.986)</td><td>(3.177)</td><td>(3.265)</td><td>(2.157)</td><td>(2.612)</td><td>(2.705)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>281</td><td>240</td><td>240</td><td>218</td><td>219</td><td>185</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11</td><td>6.09</td><td>5.9</td><td>4.36</td><td>4.29</td><td>2.54</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.09</td><td>0.71</td><td>0.72</td><td>0.85</td><td>0.85</td><td>0.94</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>4.42</td><td>44.28</td><td>31.14</td><td>65.92</td><td>49.88</td><td>97.43</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0.0431</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Linear Models of Number of Orphan Drug Market Authorizations
Function to Plot Models of Number of Orphan Drug Market
Authorizations (Linear or Poisson Models)
plot_lin_pois_od <- function(model, model_number, model_type){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
predicted <- predict(model , type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
combined_rd_ma_df$residuals <- actual-predicted#resid(model)
par(mfrow =c(2,3))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
#qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
#qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('{model_type} Model {model_number} of Orphan Drug MAs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
ggplot(combined_rd_ma_df, aes(x = year, y = freq_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = str_wrap("Number of Orphan Drug Market Authorizations", width = 40),
color = "Region",
alpha = " ",
title=str_wrap(glue("{model_type} Model {model_number} of Annual Orphan Drug Market Authorizations"), width = 60))+
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
#Teun van den Brand
#2024/02/26
}
Get Metrics of Models of Number of Orphan Drug Market Authorizations
(Linear or Poisson Models)
get_od_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$freq_orphan_drug_mas
model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
model_df1 <- NA
model_df2 <- NA
model_adjRsq <- NA
model_fstat <- NA
model_pval <- NA
model_loglik <- NA
if(model_type == "Linear"){
model_df1 <- summary(model)$fstatistic["numdf"]
model_df2 <- summary(model)$fstatistic["dendf"]
model_fstat <- summary(model)$fstatistic["value"]
model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
model_adjRsq <- summary(model)$adj.r.squared
}
if(model_type == "Poisson"){
model_loglik <- logLik(model)
}
model_aic <- AIC(model)
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Num of Annual Orphan Drug MAs
mod.od.1 <- lm(freq_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df)
summary(mod.od.1)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area), data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.667 -11.667 -2.028 4.653 50.333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.389 5.055 2.253 0.0308 *
## as.factor(area)US 32.278 7.149 4.515 7.24e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.45 on 34 degrees of freedom
## Multiple R-squared: 0.3748, Adjusted R-squared: 0.3564
## F-statistic: 20.38 on 1 and 34 DF, p-value: 7.244e-05
plot_lin_pois_od(mod.od.1, "1", "Linear")


#Estimate covariance matrix with Newey West
mod.od.1.vcov <- vcovPL(mod.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.1 <-coeftest(mod.od.1, vcov = mod.od.1.vcov)
mod.od.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.3889 1.3974 8.1500 1.663e-09 ***
## as.factor(area)US 32.2778 9.9247 3.2523 0.002587 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 8.549014 14.22876
## as.factor(area)US 12.108452 52.44710
mod.od.1.metrics <- get_od_model_metrics(mod.od.1, "Linear")
mod.od.1.metrics
Linear Model 2 of Num of Annual Orphan Drug MAs
mod.od.2 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.od.2)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.551 -11.583 -2.681 11.086 34.966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.3611 5.6427 -2.191 0.0356 *
## as.factor(area)US 32.2778 5.2143 6.190 5.52e-07 ***
## yearsafter2004 2.7941 0.5025 5.560 3.53e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.64 on 33 degrees of freedom
## Multiple R-squared: 0.6772, Adjusted R-squared: 0.6576
## F-statistic: 34.62 on 2 and 33 DF, p-value: 7.892e-09
plot_lin_pois_od(mod.od.2, "2", "Linear")


#Estimate covariance matrix with Newey West
mod.od.2.vcov <- vcovPL(mod.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.2 <-coeftest(mod.od.2, vcov = mod.od.2.vcov)
mod.od.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12.36111 7.51879 -1.6440 0.1097
## as.factor(area)US 32.27778 10.07391 3.2041 0.0030 **
## yearsafter2004 2.79412 0.37637 7.4239 1.571e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) -27.658208 2.935986
## as.factor(area)US 11.782261 52.773295
## yearsafter2004 2.028395 3.559841
mod.od.2.metrics <- get_od_model_metrics(mod.od.2, "Linear")
Linear Model 3 of Num of Annual Orphan Drug MAs
mod.od.3 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.3)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.932 -11.179 -3.044 12.434 33.530
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.0152 5.9576 -2.520 0.0169 *
## as.factor(area)US 32.2321 5.1638 6.242 5.40e-07 ***
## yearsafter2004 2.8922 0.5035 5.745 2.28e-06 ***
## gdp_per_capita_annual_growthrate_usd 0.5674 0.4417 1.285 0.2081
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.49 on 32 degrees of freedom
## Multiple R-squared: 0.693, Adjusted R-squared: 0.6643
## F-statistic: 24.08 on 3 and 32 DF, p-value: 2.419e-08
plot_lin_pois_od(mod.od.3, "3", "Linear")


#Estimate covariance matrix with Newey West
mod.od.3.vcov <- vcovPL(mod.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.3 <-coeftest(mod.od.3, vcov = mod.od.3.vcov)
mod.od.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.01520 6.14421 -2.4438 0.020228 *
## as.factor(area)US 32.23207 10.02166 3.2162 0.002967 **
## yearsafter2004 2.89217 0.28886 10.0125 2.199e-11 ***
## gdp_per_capita_annual_growthrate_usd 0.56737 0.27724 2.0465 0.049000 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) -27.530557154 -2.499846
## as.factor(area)US 11.818618997 52.645527
## yearsafter2004 2.303792315 3.480552
## gdp_per_capita_annual_growthrate_usd 0.002641035 1.132100
mod.od.3.metrics <- get_od_model_metrics(mod.od.3, "Linear")
vif(mod.od.3)
## as.factor(area) yearsafter2004
## 1.000047 1.023525
## gdp_per_capita_annual_growthrate_usd
## 1.023572
Linear Model 4 of Num of Annual Orphan Drug MAs
mod.od.4 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.od.4)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## as.factor(area):yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.4159 -4.9394 -0.3846 5.8571 22.4417
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.9942 4.4174 1.583 0.123
## as.factor(area)US -6.4327 6.2472 -1.030 0.311
## yearsafter2004 0.5170 0.4436 1.166 0.252
## as.factor(area)US:yearsafter2004 4.5542 0.6273 7.260 3e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.764 on 32 degrees of freedom
## Multiple R-squared: 0.8781, Adjusted R-squared: 0.8666
## F-statistic: 76.8 on 3 and 32 DF, p-value: 1.04e-14
plot_lin_pois_od(mod.od.4, "4", "Linear")


#Estimate covariance matrix with Newey West
mod.od.4.vcov <- vcovPL(mod.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.4 <-coeftest(mod.od.4, vcov = mod.od.4.vcov)
mod.od.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.99415 1.95792 3.5722 0.001145 **
## as.factor(area)US -6.43275 7.43836 -0.8648 0.393582
## yearsafter2004 0.51703 0.16584 3.1176 0.003839 **
## as.factor(area)US:yearsafter2004 4.55418 0.70803 6.4322 3.125e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.4)
## 2.5 % 97.5 %
## (Intercept) 3.0059898 10.9823143
## as.factor(area)US -21.5842014 8.7187043
## yearsafter2004 0.1792165 0.8548392
## as.factor(area)US:yearsafter2004 3.1119702 5.9963890
mod.od.4.metrics <- get_od_model_metrics(mod.od.4, "Linear")
predicted <- predict(mod.od.4, type = "response")
Linear Model 5 of Num of Annual Orphan Drug MAs
mod.od.5 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.5)
##
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.0780 -4.9553 -0.3644 5.8711 22.0730
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4758 4.7757 1.147 0.260
## as.factor(area)US -5.6901 6.3325 -0.899 0.376
## yearsafter2004 0.6039 0.4568 1.322 0.196
## gdp_per_capita_annual_growthrate_usd 0.2431 0.2834 0.858 0.398
## as.factor(area)US:yearsafter2004 4.4645 0.6386 6.992 7.58e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.805 on 31 degrees of freedom
## Multiple R-squared: 0.8809, Adjusted R-squared: 0.8655
## F-statistic: 57.31 on 4 and 31 DF, p-value: 6.978e-14
plot_lin_pois_od(mod.od.5, "5", "Linear")


#Estimate covariance matrix with Newey West
mod.od.5.vcov <- vcovPL(mod.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.od.nw.5 <-coeftest(mod.od.5, vcov = mod.od.5.vcov)
mod.od.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.47584 1.81680 3.0140 0.005104 **
## as.factor(area)US -5.69012 7.99710 -0.7115 0.482081
## yearsafter2004 0.60388 0.17722 3.4075 0.001834 **
## gdp_per_capita_annual_growthrate_usd 0.24310 0.14954 1.6257 0.114144
## as.factor(area)US:yearsafter2004 4.46451 0.76598 5.8285 2.001e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.5)
## 2.5 % 97.5 %
## (Intercept) 1.77044391 9.1812268
## as.factor(area)US -22.00031835 10.6200844
## yearsafter2004 0.24243509 0.9653205
## gdp_per_capita_annual_growthrate_usd -0.06188618 0.5480933
## as.factor(area)US:yearsafter2004 2.90228697 6.0267275
mod.od.5.metrics <- get_od_model_metrics(mod.od.5, "Linear")
predicted <- predict(mod.od.5, type = "response")
mod.od.metrics <- rbind(mod.od.1.metrics, mod.od.2.metrics, mod.od.3.metrics, mod.od.4.metrics, mod.od.5.metrics)
aic_values <- round(mod.od.metrics$aic)
rmse_values <- round(mod.od.metrics$rmse, 2)
fstat_values <- round(mod.od.metrics$fstat, 2)
p_values <- round(mod.od.metrics$pval, 4)
adjrsq_values <- round(mod.od.metrics$adjrsq, 2)
stargazer(mod.od.1, mod.od.2, mod.od.3, mod.od.4, mod.od.5, title="Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>32.278<sup>***</sup></td><td>32.278<sup>***</sup></td><td>32.232<sup>***</sup></td><td>-6.433</td><td>-5.690</td></tr>
## <tr><td style="text-align:left"></td><td>(7.149)</td><td>(5.214)</td><td>(5.164)</td><td>(6.247)</td><td>(6.333)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>2.794<sup>***</sup></td><td>2.892<sup>***</sup></td><td>0.517</td><td>0.604</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.503)</td><td>(0.503)</td><td>(0.444)</td><td>(0.457)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.567</td><td></td><td>0.243</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.442)</td><td></td><td>(0.283)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>4.554<sup>***</sup></td><td>4.465<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.627)</td><td>(0.639)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>11.389<sup>**</sup></td><td>-12.361<sup>**</sup></td><td>-15.015<sup>**</sup></td><td>6.994</td><td>5.476</td></tr>
## <tr><td style="text-align:left"></td><td>(5.055)</td><td>(5.643)</td><td>(5.958)</td><td>(4.417)</td><td>(4.776)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>327</td><td>305</td><td>305</td><td>272</td><td>273</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>14.98</td><td>14.61</td><td>9.21</td><td>9.1</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.36</td><td>0.66</td><td>0.66</td><td>0.87</td><td>0.87</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.38</td><td>34.62</td><td>24.08</td><td>76.8</td><td>57.31</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.od.nw.1
m2 <- mod.od.nw.2
m3 <- mod.od.nw.3
m4 <- mod.od.nw.4
m5 <- mod.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodels.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>32.278<sup>***</sup></td><td>32.278<sup>***</sup></td><td>32.232<sup>***</sup></td><td>-6.433</td><td>-5.690</td></tr>
## <tr><td style="text-align:left"></td><td>(9.925)</td><td>(10.074)</td><td>(10.022)</td><td>(7.438)</td><td>(7.997)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>2.794<sup>***</sup></td><td>2.892<sup>***</sup></td><td>0.517<sup>***</sup></td><td>0.604<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.376)</td><td>(0.289)</td><td>(0.166)</td><td>(0.177)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.567<sup>**</sup></td><td></td><td>0.243</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.277)</td><td></td><td>(0.150)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>4.554<sup>***</sup></td><td>4.465<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.708)</td><td>(0.766)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>11.389<sup>***</sup></td><td>-12.361</td><td>-15.015<sup>**</sup></td><td>6.994<sup>***</sup></td><td>5.476<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.397)</td><td>(7.519)</td><td>(6.144)</td><td>(1.958)</td><td>(1.817)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>327</td><td>305</td><td>305</td><td>272</td><td>273</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>14.98</td><td>14.61</td><td>9.21</td><td>9.1</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.36</td><td>0.66</td><td>0.66</td><td>0.87</td><td>0.87</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.38</td><td>34.62</td><td>24.08</td><td>76.8</td><td>57.31</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Poisson Models of Number of Orphan Drug Market Authorization
Models
Poisson Model 1 of Num of Annual Orphan Drug MAs
Newey-West adjustment for glms with panel data: Zeileis, Achim, and
Thomas Lumley. Package “Sandwich” Title Robust Covariance Matrix
Estimators. 15 Sept. 2020, pp. 4–6,
cran.r-project.org/web/packages/sandwich/sandwich.pdf.
Aghion P, Van Reenen J, Zingales L (2013). “Innovation and
Institutional Ownership.” The American Economic Review, 103(1), 277–304.
doi:10.1257/aer.103.1.277 https://www.aeaweb.org/articles?id=10.1257/aer.103.1.277
mod.pois.od.1 <- glm(freq_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.1)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area), family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.43264 0.06984 34.83 <2e-16 ***
## as.factor(area)US 1.34395 0.07842 17.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 741.47 on 35 degrees of freedom
## Residual deviance: 378.01 on 34 degrees of freedom
## AIC: 554.08
##
## Number of Fisher Scoring iterations: 5
plot_lin_pois_od(mod.pois.od.1, "1", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.1.vcov <- vcovPL(mod.pois.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.1 <- coeftest(mod.pois.od.1, vcov = mod.pois.od.1.vcov)
mod.pois.od.nw.1
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.43264 0.12270 19.8260 < 2.2e-16 ***
## as.factor(area)US 1.34395 0.17073 7.8717 3.498e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 2.192152 2.673124
## as.factor(area)US 1.009320 1.678574
mod.pois.od.1.metrics <- get_od_model_metrics(mod.pois.od.1, "Poisson")
mod.pois.od.1.metrics
Poisson Model 2 of Num of Annual Orphan Drug MAs
mod.pois.od.2 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.2)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.365511 0.102632 13.30 <2e-16 ***
## as.factor(area)US 1.343947 0.078423 17.14 <2e-16 ***
## yearsafter2004 0.107718 0.006695 16.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 741.469 on 35 degrees of freedom
## Residual deviance: 95.044 on 33 degrees of freedom
## AIC: 273.11
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.2, "2", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.2.vcov <- vcovPL(mod.pois.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.2 <- coeftest(mod.pois.od.2, vcov = mod.pois.od.2.vcov)
mod.pois.od.nw.2
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.36551 0.22395 6.0974 1.078e-09 ***
## as.factor(area)US 1.34395 0.17330 7.7551 8.829e-15 ***
## yearsafter2004 0.10772 0.01183 9.1057 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) 0.92657995 1.8044420
## as.factor(area)US 1.00428727 1.6836064
## yearsafter2004 0.08453205 0.1309037
mod.pois.od.2.metrics <- get_od_model_metrics(mod.pois.od.2, "Poisson")
mod.pois.od.2.metrics
Poisson Model 3 of Num of Annual Orphan Drug MAs
mod.pois.od.3 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.3)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd, family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.348465 0.103085 13.08 <2e-16 ***
## as.factor(area)US 1.338517 0.078467 17.06 <2e-16 ***
## yearsafter2004 0.106749 0.006686 15.96 <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.009457 0.007059 1.34 0.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 741.47 on 35 degrees of freedom
## Residual deviance: 93.24 on 32 degrees of freedom
## AIC: 273.3
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.3, "3", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.3.vcov <- vcovPL(mod.pois.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.3 <- coeftest(mod.pois.od.3, vcov = mod.pois.od.3.vcov)
mod.pois.od.nw.3
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.3484650 0.1964902 6.8628 6.754e-12 ***
## as.factor(area)US 1.3385166 0.1775705 7.5379 4.774e-14 ***
## yearsafter2004 0.1067490 0.0117507 9.0845 < 2.2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.0094568 0.0137365 0.6884 0.4912
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 0.96335126 1.73357880
## as.factor(area)US 0.99048474 1.68654841
## yearsafter2004 0.08371814 0.12977990
## gdp_per_capita_annual_growthrate_usd -0.01746631 0.03637987
mod.pois.od.3.metrics <- get_od_model_metrics(mod.pois.od.3, "Poisson")
mod.pois.od.3.metrics
Poisson Model 4 of Num of Annual Orphan Drug MAs
mod.pois.od.4 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.4)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## as.factor(area):yearsafter2004, family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.01416 0.15033 13.399 < 2e-16 ***
## as.factor(area)US 0.48931 0.17884 2.736 0.006220 **
## yearsafter2004 0.04591 0.01369 3.353 0.000799 ***
## as.factor(area)US:yearsafter2004 0.07982 0.01573 5.074 3.9e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 741.469 on 35 degrees of freedom
## Residual deviance: 69.927 on 32 degrees of freedom
## AIC: 249.99
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.4, "4", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.4.vcov <- vcovPL(mod.pois.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.4 <- coeftest(mod.pois.od.4, vcov = mod.pois.od.4.vcov)
mod.pois.od.nw.4
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.014163 0.217571 9.2575 < 2.2e-16 ***
## as.factor(area)US 0.489305 0.210912 2.3200 0.020343 *
## yearsafter2004 0.045913 0.016889 2.7185 0.006557 **
## as.factor(area)US:yearsafter2004 0.079825 0.016730 4.7712 1.831e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.4)
## 2.5 % 97.5 %
## (Intercept) 1.58773263 2.44059407
## as.factor(area)US 0.07592652 0.90268446
## yearsafter2004 0.01281162 0.07901529
## as.factor(area)US:yearsafter2004 0.04703351 0.11261575
mod.pois.od.4.metrics <- get_od_model_metrics(mod.pois.od.4, "Poisson")
mod.pois.od.4.metrics
Poisson Model 5 of Num of Annual Orphan Drug MAs
mod.pois.od.5 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd , data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.5)
##
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.986725 0.153650 12.930 < 2e-16 ***
## as.factor(area)US 0.507058 0.179465 2.825 0.004722 **
## yearsafter2004 0.047062 0.013655 3.446 0.000568 ***
## gdp_per_capita_annual_growthrate_usd 0.005395 0.006887 0.783 0.433467
## as.factor(area)US:yearsafter2004 0.077857 0.015827 4.919 8.68e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 741.469 on 35 degrees of freedom
## Residual deviance: 69.311 on 31 degrees of freedom
## AIC: 251.38
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.5, "5", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.od.5.vcov <- vcovPL(mod.pois.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.5 <- coeftest(mod.pois.od.5, vcov = mod.pois.od.5.vcov)
mod.pois.od.nw.5
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9867252 0.2009717 9.8856 < 2.2e-16 ***
## as.factor(area)US 0.5070576 0.2032372 2.4949 0.012599 *
## yearsafter2004 0.0470621 0.0164600 2.8592 0.004247 **
## gdp_per_capita_annual_growthrate_usd 0.0053946 0.0100360 0.5375 0.590905
## as.factor(area)US:yearsafter2004 0.0778573 0.0160970 4.8368 1.32e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.5)
## 2.5 % 97.5 %
## (Intercept) 1.59282782 2.38062260
## as.factor(area)US 0.10872005 0.90539517
## yearsafter2004 0.01480104 0.07932320
## gdp_per_capita_annual_growthrate_usd -0.01427566 0.02506492
## as.factor(area)US:yearsafter2004 0.04630775 0.10940681
mod.pois.od.5.metrics <- get_od_model_metrics(mod.pois.od.5, "Poisson")
mod.pois.od.5.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
mod.od.pois.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
aic_values <- round(mod.od.pois.metrics$aic)
rmse_values <- round(mod.od.pois.metrics$rmse, 2)
logLik_vlues <- round(mod.od.pois.metrics$LogLik, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.od.1
m2 <- mod.pois.od.2
m3 <- mod.pois.od.3
m4 <- mod.pois.od.4
m5 <- mod.pois.od.5
stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
##
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>1.344<sup>***</sup></td><td>1.344<sup>***</sup></td><td>1.339<sup>***</sup></td><td>0.489<sup>***</sup></td><td>0.507<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.078)</td><td>(0.078)</td><td>(0.078)</td><td>(0.179)</td><td>(0.179)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.108<sup>***</sup></td><td>0.107<sup>***</sup></td><td>0.046<sup>***</sup></td><td>0.047<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.014)</td><td>(0.014)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.009</td><td></td><td>0.005</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td><td></td><td>(0.007)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.080<sup>***</sup></td><td>0.078<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.016)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.433<sup>***</sup></td><td>1.366<sup>***</sup></td><td>1.348<sup>***</sup></td><td>2.014<sup>***</sup></td><td>1.987<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.070)</td><td>(0.103)</td><td>(0.103)</td><td>(0.150)</td><td>(0.154)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>554</td><td>273</td><td>273</td><td>250</td><td>251</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>8.34</td><td>8.45</td><td>7.54</td><td>7.63</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-275.04</td><td>-133.55</td><td>-132.65</td><td>-121</td><td>-120.69</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.od.nw.1
m2 <- mod.pois.od.nw.2
m3 <- mod.pois.od.nw.3
m4 <- mod.pois.od.nw.4
m5 <- mod.pois.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodels.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>1.344<sup>***</sup></td><td>1.344<sup>***</sup></td><td>1.339<sup>***</sup></td><td>0.489<sup>**</sup></td><td>0.507<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.171)</td><td>(0.173)</td><td>(0.178)</td><td>(0.211)</td><td>(0.203)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.108<sup>***</sup></td><td>0.107<sup>***</sup></td><td>0.046<sup>***</sup></td><td>0.047<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.012)</td><td>(0.012)</td><td>(0.017)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.009</td><td></td><td>0.005</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.014)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.080<sup>***</sup></td><td>0.078<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.017)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.433<sup>***</sup></td><td>1.366<sup>***</sup></td><td>1.348<sup>***</sup></td><td>2.014<sup>***</sup></td><td>1.987<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.123)</td><td>(0.224)</td><td>(0.196)</td><td>(0.218)</td><td>(0.201)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>554</td><td>273</td><td>273</td><td>250</td><td>251</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>8.34</td><td>8.45</td><td>7.54</td><td>7.63</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-275.04</td><td>-133.55</td><td>-132.65</td><td>-121</td><td>-120.69</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Linear Models of Number of Market Authorizations for NEW Orphan
Drugs
Plot Models of Number of Orphan Drug Market Authorizations for NEW
Orphan Drugs (Linear or Poisson Models)
plot_lin_pois_new_od <- function(model, model_number, model_type){
#Plot residuals and acf/ pacf of residuals by panel (EU/US)
predicted <- predict(model , type = "response")
actual <- combined_rd_ma_df$freq_new_orphan_drug_mas
combined_rd_ma_df$residuals <- actual-predicted#resid(model)
par(mfrow =c(2,3))
us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
acf(us_resid_data$residuals, main="ACF of US Residuals")
pacf(us_resid_data$residuals, main="PACF of US Residuals")
#qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
acf(eu_resid_data$residuals, main="ACF of EU Residuals")
pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
#qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
mtext(glue('{model_type} Model {model_number} of MAs for New Orphan Drugs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)
ggplot(combined_rd_ma_df, aes(x = year, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Year",
y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
color = "Region",
alpha = " ",
title=str_wrap(glue("{model_type} Model {model_number} of Market Authorizations for New Orphan Drugs"), width = 60))+
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()
# https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
#Controlling legend appearance in ggplot2 with override.aes
#July 9, 2020 · @aosmith16
# https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
#Teun van den Brand
#2024/02/26
}
Get Metrics of Models of Number of Market Authorizations for NEW
Orphan Drugs (Linear or Poisson Models)
get_new_od_model_metrics <- function(model, model_type){
#Get RMSE
predicted <- predict(model, type = "response")
actual <- combined_rd_ma_df$freq_new_orphan_drug_mas
model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
model_df1 <- NA
model_df2 <- NA
model_adjRsq <- NA
model_fstat <- NA
model_pval <- NA
model_loglik <- NA
if(model_type == "Linear"){
model_df1 <- summary(model)$fstatistic[2]
model_df2 <- summary(model)$fstatistic[3]
model_fstat <- summary(model)$fstatistic[1]
model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE)
# https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
# Renesh Bedre
# March 26, 2023
model_adjRsq <- summary(model)$adj.r.squared
}
if(model_type == "Poisson"){
model_loglik <- logLik(model)
}
model_aic <- AIC(model)
metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
rownames(metrics_df) <- NULL
return(metrics_df)
}
Linear Model 1 of Annual MAs for NEW orphan drugs
mod.new.od.1 <- lm(freq_new_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df)
summary(mod.new.od.1)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area), data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.667 -6.611 -1.667 4.347 25.333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.611 2.372 4.473 8.20e-05 ***
## as.factor(area)US 15.056 3.355 4.488 7.85e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.06 on 34 degrees of freedom
## Multiple R-squared: 0.372, Adjusted R-squared: 0.3535
## F-statistic: 20.14 on 1 and 34 DF, p-value: 7.85e-05
plot_lin_pois_new_od(mod.new.od.1, "1", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.1.vcov <- vcovPL(mod.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.1 <-coeftest(mod.new.od.1, vcov = mod.new.od.1.vcov)
mod.new.od.nw.1
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.6111 1.5389 6.8954 6.07e-08 ***
## as.factor(area)US 15.0556 3.4953 4.3074 0.0001331 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.1.metrics <- get_new_od_model_metrics(mod.new.od.1, "Linear")
confint(mod.new.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 7.483735 13.73849
## as.factor(area)US 7.952272 22.15884
Linear Model 2 of Annual MAs for NEW orphan drugs
mod.new.od.2 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.2)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.9653 -3.6720 -0.9102 3.6305 14.5400
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.621 2.381 -0.681 0.501
## as.factor(area)US 15.056 2.200 6.844 8.25e-08 ***
## yearsafter2004 1.439 0.212 6.788 9.69e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.6 on 33 degrees of freedom
## Multiple R-squared: 0.7379, Adjusted R-squared: 0.722
## F-statistic: 46.45 on 2 and 33 DF, p-value: 2.539e-10
plot_lin_pois_new_od(mod.new.od.2, "2", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.2.vcov <- vcovPL(mod.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.2 <-coeftest(mod.new.od.2, vcov = mod.new.od.2.vcov)
mod.new.od.nw.2
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.62135 2.67095 -0.6070 0.5479860
## as.factor(area)US 15.05556 3.54785 4.2436 0.0001677 ***
## yearsafter2004 1.43911 0.13805 10.4249 5.653e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.2.metrics <- get_new_od_model_metrics(mod.new.od.2, "Linear")
Linear Model 3 of Annual MAs for NEW orphan drugs
mod.new.od.3 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.3)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2793 -4.3715 -0.0866 3.1109 15.3070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4307 2.5443 -0.955 0.347
## as.factor(area)US 15.0416 2.2053 6.821 1.03e-07 ***
## yearsafter2004 1.4690 0.2150 6.832 9.99e-08 ***
## gdp_per_capita_annual_growthrate_usd 0.1730 0.1886 0.917 0.366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.616 on 32 degrees of freedom
## Multiple R-squared: 0.7446, Adjusted R-squared: 0.7207
## F-statistic: 31.1 on 3 and 32 DF, p-value: 1.318e-09
plot_lin_pois_new_od(mod.new.od.3, "3", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.3.vcov <- vcovPL(mod.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.3 <-coeftest(mod.new.od.3, vcov = mod.new.od.3.vcov)
mod.new.od.nw.3
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.43067 2.04420 -1.1891 0.2431670
## as.factor(area)US 15.04162 3.49062 4.3092 0.0001459 ***
## yearsafter2004 1.46901 0.12625 11.6357 4.919e-13 ***
## gdp_per_capita_annual_growthrate_usd 0.17301 0.17010 1.0171 0.3167178
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.3.metrics <- get_new_od_model_metrics(mod.new.od.3, "Linear")
confint(mod.new.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) -6.5945632 1.7332329
## as.factor(area)US 7.9314571 22.1517802
## yearsafter2004 1.2118478 1.7261772
## gdp_per_capita_annual_growthrate_usd -0.1734628 0.5194828
Linear Model 4 of Annual MAs for NEW orphan drugs
mod.new.od.4 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.4)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## as.factor(area):yearsafter2004, data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.5191 -2.7497 0.2246 3.6370 8.5067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2164 2.2767 2.291 0.02868 *
## as.factor(area)US 1.3801 3.2197 0.429 0.67105
## yearsafter2004 0.6347 0.2286 2.776 0.00912 **
## as.factor(area)US:yearsafter2004 1.6089 0.3233 4.976 2.13e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.032 on 32 degrees of freedom
## Multiple R-squared: 0.8522, Adjusted R-squared: 0.8384
## F-statistic: 61.52 on 3 and 32 DF, p-value: 2.213e-13
plot_lin_pois_new_od(mod.new.od.4, "4", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.4.vcov <- vcovPL(mod.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.4 <-coeftest(mod.new.od.4, vcov = mod.new.od.4.vcov)
mod.new.od.nw.4
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.21637 1.45018 3.5971 0.00107 **
## as.factor(area)US 1.38012 2.39490 0.5763 0.56846
## yearsafter2004 0.63467 0.13336 4.7590 3.999e-05 ***
## as.factor(area)US:yearsafter2004 1.60888 0.24304 6.6198 1.827e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.4.metrics <- get_new_od_model_metrics(mod.new.od.4, "Linear")
confint(mod.new.od.nw.4)
## 2.5 % 97.5 %
## (Intercept) 2.2624561 8.1702924
## as.factor(area)US -3.4981245 6.2583584
## yearsafter2004 0.3630212 0.9063287
## as.factor(area)US:yearsafter2004 1.1138186 2.1039316
predicted <- predict(mod.new.od.4, type = "response")
# Plot of Annual Orphan Drug Market Authorizations by R&D spending
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Domestic R&D Spending (Billion USD)",
y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
color = "Region",
alpha = "",
title=str_wrap(glue("Linear Model 4 of Market Authorizations for New Orphan Drugs by Domestic R&D Spending"), width = 60))+
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=annual_domestic_RD_spending_bil_dollars, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))

theme_minimal()
## List of 136
## $ line :List of 6
## ..$ colour : chr "black"
## ..$ linewidth : num 0.5
## ..$ linetype : num 1
## ..$ lineend : chr "butt"
## ..$ arrow : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_line" "element"
## $ rect :List of 5
## ..$ fill : chr "white"
## ..$ colour : chr "black"
## ..$ linewidth : num 0.5
## ..$ linetype : num 1
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_rect" "element"
## $ text :List of 11
## ..$ family : chr ""
## ..$ face : chr "plain"
## ..$ colour : chr "black"
## ..$ size : num 11
## ..$ hjust : num 0.5
## ..$ vjust : num 0.5
## ..$ angle : num 0
## ..$ lineheight : num 0.9
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : logi FALSE
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ title : NULL
## $ aspect.ratio : NULL
## $ axis.title : NULL
## $ axis.title.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.75points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 2.75points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.x.bottom : NULL
## $ axis.title.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : num 90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.75points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.title.y.left : NULL
## $ axis.title.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : num -90
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 2.75points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : chr "grey30"
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 1
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 2.2points 0points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.top :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : NULL
## ..$ vjust : num 0
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 2.2points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.x.bottom : NULL
## $ axis.text.y :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 1
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.2points 0points 0points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.y.left : NULL
## $ axis.text.y.right :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 0points 0points 2.2points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.text.theta : NULL
## $ axis.text.r :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0.5
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : 'margin' num [1:4] 0points 2.2points 0points 2.2points
## .. ..- attr(*, "unit")= int 8
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ axis.ticks : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.ticks.x : NULL
## $ axis.ticks.x.top : NULL
## $ axis.ticks.x.bottom : NULL
## $ axis.ticks.y : NULL
## $ axis.ticks.y.left : NULL
## $ axis.ticks.y.right : NULL
## $ axis.ticks.theta : NULL
## $ axis.ticks.r : NULL
## $ axis.minor.ticks.x.top : NULL
## $ axis.minor.ticks.x.bottom : NULL
## $ axis.minor.ticks.y.left : NULL
## $ axis.minor.ticks.y.right : NULL
## $ axis.minor.ticks.theta : NULL
## $ axis.minor.ticks.r : NULL
## $ axis.ticks.length : 'simpleUnit' num 2.75points
## ..- attr(*, "unit")= int 8
## $ axis.ticks.length.x : NULL
## $ axis.ticks.length.x.top : NULL
## $ axis.ticks.length.x.bottom : NULL
## $ axis.ticks.length.y : NULL
## $ axis.ticks.length.y.left : NULL
## $ axis.ticks.length.y.right : NULL
## $ axis.ticks.length.theta : NULL
## $ axis.ticks.length.r : NULL
## $ axis.minor.ticks.length : 'rel' num 0.75
## $ axis.minor.ticks.length.x : NULL
## $ axis.minor.ticks.length.x.top : NULL
## $ axis.minor.ticks.length.x.bottom: NULL
## $ axis.minor.ticks.length.y : NULL
## $ axis.minor.ticks.length.y.left : NULL
## $ axis.minor.ticks.length.y.right : NULL
## $ axis.minor.ticks.length.theta : NULL
## $ axis.minor.ticks.length.r : NULL
## $ axis.line : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ axis.line.x : NULL
## $ axis.line.x.top : NULL
## $ axis.line.x.bottom : NULL
## $ axis.line.y : NULL
## $ axis.line.y.left : NULL
## $ axis.line.y.right : NULL
## $ axis.line.theta : NULL
## $ axis.line.r : NULL
## $ legend.background : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.margin : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
## ..- attr(*, "unit")= int 8
## $ legend.spacing : 'simpleUnit' num 11points
## ..- attr(*, "unit")= int 8
## $ legend.spacing.x : NULL
## $ legend.spacing.y : NULL
## $ legend.key : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.key.size : 'simpleUnit' num 1.2lines
## ..- attr(*, "unit")= int 3
## $ legend.key.height : NULL
## $ legend.key.width : NULL
## $ legend.key.spacing : 'simpleUnit' num 5.5points
## ..- attr(*, "unit")= int 8
## $ legend.key.spacing.x : NULL
## $ legend.key.spacing.y : NULL
## $ legend.frame : NULL
## $ legend.ticks : NULL
## $ legend.ticks.length : 'rel' num 0.2
## $ legend.axis.line : NULL
## $ legend.text :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : 'rel' num 0.8
## ..$ hjust : NULL
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.text.position : NULL
## $ legend.title :List of 11
## ..$ family : NULL
## ..$ face : NULL
## ..$ colour : NULL
## ..$ size : NULL
## ..$ hjust : num 0
## ..$ vjust : NULL
## ..$ angle : NULL
## ..$ lineheight : NULL
## ..$ margin : NULL
## ..$ debug : NULL
## ..$ inherit.blank: logi TRUE
## ..- attr(*, "class")= chr [1:2] "element_text" "element"
## $ legend.title.position : NULL
## $ legend.position : chr "right"
## $ legend.position.inside : NULL
## $ legend.direction : NULL
## $ legend.byrow : NULL
## $ legend.justification : chr "center"
## $ legend.justification.top : NULL
## $ legend.justification.bottom : NULL
## $ legend.justification.left : NULL
## $ legend.justification.right : NULL
## $ legend.justification.inside : NULL
## $ legend.location : NULL
## $ legend.box : NULL
## $ legend.box.just : NULL
## $ legend.box.margin : 'margin' num [1:4] 0cm 0cm 0cm 0cm
## ..- attr(*, "unit")= int 1
## $ legend.box.background : list()
## ..- attr(*, "class")= chr [1:2] "element_blank" "element"
## $ legend.box.spacing : 'simpleUnit' num 11points
## ..- attr(*, "unit")= int 8
## [list output truncated]
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi TRUE
## - attr(*, "validate")= logi TRUE
Linear Model 5 of Annual MAs for NEW orphan drugs
mod.new.od.5 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.5)
##
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## data = combined_rd_ma_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.439 -2.862 0.288 3.660 8.842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.8560 2.4843 1.955 0.05969 .
## as.factor(area)US 1.5564 3.2941 0.472 0.63990
## yearsafter2004 0.6553 0.2376 2.758 0.00967 **
## gdp_per_capita_annual_growthrate_usd 0.0577 0.1474 0.391 0.69814
## as.factor(area)US:yearsafter2004 1.5876 0.3322 4.779 4.04e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.1 on 31 degrees of freedom
## Multiple R-squared: 0.853, Adjusted R-squared: 0.834
## F-statistic: 44.96 on 4 and 31 DF, p-value: 1.77e-12
plot_lin_pois_new_od(mod.new.od.5, "5", "Linear")


#Estimate covariance matrix with Newey West
mod.new.od.5.vcov <- vcovPL(mod.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.5 <-coeftest(mod.new.od.5, vcov = mod.new.od.5.vcov)
mod.new.od.nw.5
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.85601 1.44648 3.3571 0.002096 **
## as.factor(area)US 1.55638 2.48325 0.6268 0.535411
## yearsafter2004 0.65529 0.14399 4.5509 7.746e-05 ***
## gdp_per_capita_annual_growthrate_usd 0.05770 0.13160 0.4385 0.664093
## as.factor(area)US:yearsafter2004 1.58759 0.26367 6.0212 1.155e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.5.metrics <- get_new_od_model_metrics(mod.new.od.5, "Linear")
confint(mod.new.od.nw.5)
## 2.5 % 97.5 %
## (Intercept) 1.9058856 7.8061280
## as.factor(area)US -3.5082423 6.6209986
## yearsafter2004 0.3616198 0.9489572
## gdp_per_capita_annual_growthrate_usd -0.2106931 0.3260928
## as.factor(area)US:yearsafter2004 1.0498405 2.1253429
predicted <- predict(mod.new.od.5, type = "response")
# Plot of Annual Orphan Drug Market Authorizations by R&D spending
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
geom_point() +
labs(x = "Domestic R&D Spending (Billion USD)",
y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
color = "Region",
alpha = "",
title=str_wrap(glue("Linear Model 5 of Market Authorizations for New Orphan Drugs by Domestic R&D Spending"), width = 60))+
scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
geom_line(aes(x=annual_domestic_RD_spending_bil_dollars, y=predicted, alpha = "Fitted"))+
scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()

mod.new.od.metrics <- rbind(mod.new.od.1.metrics, mod.new.od.2.metrics, mod.new.od.3.metrics, mod.new.od.4.metrics, mod.new.od.5.metrics)
aic_values <- round(mod.new.od.metrics$aic)
rmse_values <- round(mod.new.od.metrics$rmse, 2)
fstat_values <- round(mod.new.od.metrics$fstat, 2)
p_values <- round(mod.new.od.metrics$pval, 4)
adjrsq_values <- round(mod.new.od.metrics$adjrsq, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.new.od.1
m2 <- mod.new.od.2
m3 <- mod.new.od.3
m4 <- mod.new.od.4
m5 <- mod.new.od.5
stargazer(m1, m2, m3, m4, m5, title="Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>15.056<sup>***</sup></td><td>15.056<sup>***</sup></td><td>15.042<sup>***</sup></td><td>1.380</td><td>1.556</td></tr>
## <tr><td style="text-align:left"></td><td>(3.355)</td><td>(2.200)</td><td>(2.205)</td><td>(3.220)</td><td>(3.294)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.439<sup>***</sup></td><td>1.469<sup>***</sup></td><td>0.635<sup>***</sup></td><td>0.655<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.212)</td><td>(0.215)</td><td>(0.229)</td><td>(0.238)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.173</td><td></td><td>0.058</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.189)</td><td></td><td>(0.147)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>1.609<sup>***</sup></td><td>1.588<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.323)</td><td>(0.332)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>10.611<sup>***</sup></td><td>-1.621</td><td>-2.431</td><td>5.216<sup>**</sup></td><td>4.856<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.372)</td><td>(2.381)</td><td>(2.544)</td><td>(2.277)</td><td>(2.484)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>272</td><td>243</td><td>244</td><td>224</td><td>226</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>6.32</td><td>6.24</td><td>4.74</td><td>4.73</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.35</td><td>0.72</td><td>0.72</td><td>0.84</td><td>0.83</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.14</td><td>46.45</td><td>31.1</td><td>61.52</td><td>44.96</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.new.od.nw.1
m2 <- mod.new.od.nw.2
m3 <- mod.new.od.nw.3
m4 <- mod.new.od.nw.4
m5 <- mod.new.od.nw.5
stargazer(m1, m2, m3, m4, m5, title="Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodels.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>15.056<sup>***</sup></td><td>15.056<sup>***</sup></td><td>15.042<sup>***</sup></td><td>1.380</td><td>1.556</td></tr>
## <tr><td style="text-align:left"></td><td>(3.495)</td><td>(3.548)</td><td>(3.491)</td><td>(2.395)</td><td>(2.483)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.439<sup>***</sup></td><td>1.469<sup>***</sup></td><td>0.635<sup>***</sup></td><td>0.655<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.138)</td><td>(0.126)</td><td>(0.133)</td><td>(0.144)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.173</td><td></td><td>0.058</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.170)</td><td></td><td>(0.132)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>1.609<sup>***</sup></td><td>1.588<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.243)</td><td>(0.264)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>10.611<sup>***</sup></td><td>-1.621</td><td>-2.431</td><td>5.216<sup>***</sup></td><td>4.856<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.539)</td><td>(2.671)</td><td>(2.044)</td><td>(1.450)</td><td>(1.446)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>272</td><td>243</td><td>244</td><td>224</td><td>226</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>6.32</td><td>6.24</td><td>4.74</td><td>4.73</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.35</td><td>0.72</td><td>0.72</td><td>0.84</td><td>0.83</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.14</td><td>46.45</td><td>31.1</td><td>61.52</td><td>44.96</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
Overall mean of Domestic R&D spending
mean(combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars)
## [1] 42.29366
Overall mean of US Domestic R&D spending
mean(subset(combined_rd_ma_df ,area=="US")$annual_domestic_RD_spending_bil_dollars)
## [1] 46.25642
Overall mean of annual EU Domestic R&D spending
mean(subset(combined_rd_ma_df ,area=="EU")$annual_domestic_RD_spending_bil_dollars)
## [1] 38.3309
Overall mean of annual number of US Market Authroized Orphan
Drugs
mean(subset(combined_rd_ma_df ,area=="US")$freq_orphan_drug_mas)
## [1] 43.66667
mean(subset(combined_rd_ma_df ,area=="US")$freq_new_orphan_drug_mas)
## [1] 25.66667
Overall mean of annual number of EU Market Authroized Orphan
Drugs
mean(subset(combined_rd_ma_df ,area=="EU")$freq_orphan_drug_mas)
## [1] 11.38889
mean(subset(combined_rd_ma_df ,area=="EU")$freq_new_orphan_drug_mas)
## [1] 10.61111
Poisson Models for Number of Market Authorizations for NEW Orphan
Drugs
Poisson model 1 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.1 <- glm(freq_new_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.1)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area), family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.36190 0.07236 32.64 <2e-16 ***
## as.factor(area)US 0.88329 0.08602 10.27 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.24 on 35 degrees of freedom
## Residual deviance: 159.30 on 34 degrees of freedom
## AIC: 326.27
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.1, "1", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.1.vcov <- vcovPL(mod.pois.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.1 <- coeftest(mod.pois.new.od.1, vcov = mod.pois.new.od.1.vcov)
mod.pois.new.od.nw.1
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.361902 0.145025 16.2862 < 2.2e-16 ***
## as.factor(area)US 0.883291 0.093369 9.4603 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.1.metrics <- get_new_od_model_metrics(mod.pois.new.od.1, "Poisson")
mod.pois.new.od.1.metrics
confint(mod.pois.new.od.nw.1)
## 2.5 % 97.5 %
## (Intercept) 2.0776576 2.646146
## as.factor(area)US 0.7002924 1.066291
Poisson model 2 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.2 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.2)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.573894 0.111327 14.14 <2e-16 ***
## as.factor(area)US 0.883291 0.086024 10.27 <2e-16 ***
## yearsafter2004 0.082198 0.007955 10.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.236 on 35 degrees of freedom
## Residual deviance: 46.701 on 33 degrees of freedom
## AIC: 215.67
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.2, "2", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.2.vcov <- vcovPL(mod.pois.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.2 <- coeftest(mod.pois.new.od.2, vcov = mod.pois.new.od.2.vcov)
mod.pois.new.od.nw.2
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5738944 0.1099422 14.3157 < 2.2e-16 ***
## as.factor(area)US 0.8832915 0.0947727 9.3201 < 2.2e-16 ***
## yearsafter2004 0.0821981 0.0065146 12.6175 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.2.metrics <- get_new_od_model_metrics(mod.pois.new.od.2, "Poisson")
mod.pois.new.od.2.metrics
confint(mod.pois.new.od.nw.2)
## 2.5 % 97.5 %
## (Intercept) 1.35841158 1.78937715
## as.factor(area)US 0.69754035 1.06904257
## yearsafter2004 0.06942973 0.09496652
Poisson model 3 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.3 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.3)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## gdp_per_capita_annual_growthrate_usd, family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.5723230 0.1127702 13.943 <2e-16 ***
## as.factor(area)US 0.8828314 0.0861801 10.244 <2e-16 ***
## yearsafter2004 0.0821682 0.0079577 10.326 <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.0006891 0.0079618 0.087 0.931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.236 on 35 degrees of freedom
## Residual deviance: 46.693 on 32 degrees of freedom
## AIC: 217.66
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.3, "3", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.3.vcov <- vcovPL(mod.pois.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.3 <- coeftest(mod.pois.new.od.3, vcov = mod.pois.new.od.3.vcov)
mod.pois.new.od.nw.3
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.57232299 0.10319966 15.2357 <2e-16 ***
## as.factor(area)US 0.88283138 0.10090233 8.7494 <2e-16 ***
## yearsafter2004 0.08216822 0.00652632 12.5903 <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.00068905 0.01170778 0.0589 0.9531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.3.metrics <- get_new_od_model_metrics(mod.pois.new.od.3, "Poisson")
mod.pois.new.od.3.metrics
confint(mod.pois.new.od.nw.3)
## 2.5 % 97.5 %
## (Intercept) 1.37005537 1.77459062
## as.factor(area)US 0.68506645 1.08059631
## yearsafter2004 0.06937687 0.09495957
## gdp_per_capita_annual_growthrate_usd -0.02225778 0.02363589
Poisson model 4 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.4 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.4)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## as.factor(area):yearsafter2004, family = poisson(link = "log"),
## data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.79374 0.16228 11.053 < 2e-16 ***
## as.factor(area)US 0.56583 0.19821 2.855 0.00431 **
## yearsafter2004 0.06101 0.01437 4.246 2.17e-05 ***
## as.factor(area)US:yearsafter2004 0.03028 0.01726 1.754 0.07948 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.236 on 35 degrees of freedom
## Residual deviance: 43.649 on 32 degrees of freedom
## AIC: 214.62
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.4, "4", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.4.vcov <- vcovPL(mod.pois.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.4 <- coeftest(mod.pois.new.od.4, vcov = mod.pois.new.od.4.vcov)
mod.pois.new.od.nw.4
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.793738 0.180770 9.9228 < 2.2e-16 ***
## as.factor(area)US 0.565835 0.197166 2.8698 0.004107 **
## yearsafter2004 0.061008 0.014373 4.2446 2.19e-05 ***
## as.factor(area)US:yearsafter2004 0.030275 0.014985 2.0203 0.043350 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.4.metrics <- get_new_od_model_metrics(mod.pois.new.od.4, "Poisson")
mod.pois.new.od.4.metrics
confint(mod.pois.new.od.nw.4)
## 2.5 % 97.5 %
## (Intercept) 1.4394360681 2.14803953
## as.factor(area)US 0.1793969585 0.95227281
## yearsafter2004 0.0328372741 0.08917868
## as.factor(area)US:yearsafter2004 0.0009044294 0.05964557
Poisson model 5 of Annual Num of MAs for NEW orphan drugs
mod.pois.new.od.5 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.5)
##
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 +
## yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd,
## family = poisson(link = "log"), data = combined_rd_ma_df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7971136 0.1660656 10.822 < 2e-16 ***
## as.factor(area)US 0.5641872 0.1990723 2.834 0.0046 **
## yearsafter2004 0.0608844 0.0144412 4.216 2.49e-05 ***
## gdp_per_capita_annual_growthrate_usd -0.0007685 0.0079038 -0.097 0.9225
## as.factor(area)US:yearsafter2004 0.0304818 0.0174101 1.751 0.0800 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.236 on 35 degrees of freedom
## Residual deviance: 43.639 on 31 degrees of freedom
## AIC: 216.61
##
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.5, "5", "Poisson")


#Estimate covariance matrix with Newey West
mod.pois.new.od.5.vcov <- vcovPL(mod.pois.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")
#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.5 <- coeftest(mod.pois.new.od.5, vcov = mod.pois.new.od.5.vcov)
mod.pois.new.od.nw.5
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.79711364 0.17974494 9.9981 < 2.2e-16
## as.factor(area)US 0.56418722 0.19477316 2.8966 0.003772
## yearsafter2004 0.06088440 0.01473714 4.1314 3.606e-05
## gdp_per_capita_annual_growthrate_usd -0.00076852 0.01037691 -0.0741 0.940962
## as.factor(area)US:yearsafter2004 0.03048184 0.01482775 2.0557 0.039809
##
## (Intercept) ***
## as.factor(area)US **
## yearsafter2004 ***
## gdp_per_capita_annual_growthrate_usd
## as.factor(area)US:yearsafter2004 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.5.metrics <- get_new_od_model_metrics(mod.pois.new.od.5, "Poisson")
mod.pois.new.od.5.metrics
confint(mod.pois.new.od.nw.5)
## 2.5 % 97.5 %
## (Intercept) 1.444820030 2.14940725
## as.factor(area)US 0.182438843 0.94593560
## yearsafter2004 0.032000135 0.08976867
## gdp_per_capita_annual_growthrate_usd -0.021106886 0.01956984
## as.factor(area)US:yearsafter2004 0.001419984 0.05954369
mod.pois.new.od.metrics <- rbind(mod.pois.new.od.1.metrics, mod.pois.new.od.2.metrics, mod.pois.new.od.3.metrics, mod.pois.new.od.4.metrics, mod.pois.new.od.5.metrics)
aic_values <- round(mod.pois.new.od.metrics$aic)
rmse_values <- round(mod.pois.new.od.metrics$rmse, 2)
loglik_values <- round(mod.pois.new.od.metrics$LogLik, 2)
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.new.od.1
m2 <- mod.pois.new.od.2
m3 <- mod.pois.new.od.3
m4 <- mod.pois.new.od.4
m5 <- mod.pois.new.od.5
stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodelsNonAdjusted.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
##
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.566<sup>***</sup></td><td>0.564<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.086)</td><td>(0.086)</td><td>(0.086)</td><td>(0.198)</td><td>(0.199)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.082<sup>***</sup></td><td>0.082<sup>***</sup></td><td>0.061<sup>***</sup></td><td>0.061<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.008)</td><td>(0.008)</td><td>(0.014)</td><td>(0.014)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.001</td><td></td><td>-0.001</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.008)</td><td></td><td>(0.008)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.030<sup>*</sup></td><td>0.030<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.017)</td><td>(0.017)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.362<sup>***</sup></td><td>1.574<sup>***</sup></td><td>1.572<sup>***</sup></td><td>1.794<sup>***</sup></td><td>1.797<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.072)</td><td>(0.111)</td><td>(0.113)</td><td>(0.162)</td><td>(0.166)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>326</td><td>216</td><td>218</td><td>215</td><td>217</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>4.7</td><td>4.72</td><td>4.59</td><td>4.57</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-161.13</td><td>-104.84</td><td>-104.83</td><td>-103.31</td><td>-103.3</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.pois.new.od.nw.1
m2 <- mod.pois.new.od.nw.2
m3 <- mod.pois.new.od.nw.3
m4 <- mod.pois.new.od.nw.4
m5 <- mod.pois.new.od.nw.5
stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Annual Market Authorizations for New Orphan Drugs ", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodels.htm",
covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
model.numbers=FALSE,
omit.stat = c("all"),
add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
##
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Annual Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.566<sup>***</sup></td><td>0.564<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.093)</td><td>(0.095)</td><td>(0.101)</td><td>(0.197)</td><td>(0.195)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.082<sup>***</sup></td><td>0.082<sup>***</sup></td><td>0.061<sup>***</sup></td><td>0.061<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.014)</td><td>(0.015)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.001</td><td></td><td>-0.001</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.012)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.030<sup>**</sup></td><td>0.030<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.015)</td><td>(0.015)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.362<sup>***</sup></td><td>1.574<sup>***</sup></td><td>1.572<sup>***</sup></td><td>1.794<sup>***</sup></td><td>1.797<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.145)</td><td>(0.110)</td><td>(0.103)</td><td>(0.181)</td><td>(0.180)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>326</td><td>216</td><td>218</td><td>215</td><td>217</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>4.7</td><td>4.72</td><td>4.59</td><td>4.57</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-161.13</td><td>-104.84</td><td>-104.83</td><td>-103.31</td><td>-103.3</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>